#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

import MySQLdb
import time
import sys

time1 = time.time()

DATABASE_NAME = 'dbVar_20140604'
HOST = '10.10.31.17'
PORT = '3306'
USER_NAME = 'dbVar'
PASSWORD = 'dbVar'
CHAR_SET = 'utf8'

INFILE = sys.argv[1]
TABLE = sys.argv[2]
COUNT = 0

conn = MySQLdb.connect(host=HOST, user=USER_NAME, passwd=PASSWORD, db=DATABASE_NAME, charset=CHAR_SET)

inputfile = open(INFILE, "r")
for line in inputfile:
    line = line.rstrip("\n")
    if not line.startswith("#"):
        COUNT += 1
        splitline = line.split("\t")
        detail = {"ID": "", "Name": "", "Alias": "", "parent": "", "Dbxref": "", "var_origin": "",
                  "Start_range": "", "End_range": "", "copy_number": "", "Zygosity": "", "consequence": "",
                  "remapScore": "", "validated": "", "sample_name": "", "phenotype": "", "phenotype_id": "",
                  "gender": "", "variant_seq": "", "Reference_seq": ""}
        info = splitline[8]
        if not info == ".":
            splitinfo = info.split(";")
            if splitline[3] == "":
                splitline[3] = "0"
            if splitline[4] == "":
                splitline[4] = "0"
            for item in splitinfo:
                splititem = item.split("=")
                name = splititem[0]
                value = splititem[1]
                detail[name] = value
            if detail["ID"] == "":
                detail["ID"] = "0"
            if detail["copy_number"] == "":
                detail["copy_number"] = "0"
            if detail["remapScore"] == "":
                detail["remapScore"] = "0"
        cursor = conn.cursor()
        sql = "insert into " + TABLE + " values(0,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        params = (
            splitline[0], splitline[1], splitline[2], splitline[3], splitline[4], splitline[5], splitline[6],
            splitline[7],
            splitline[8], detail["ID"], detail["Name"], detail["Alias"], detail["parent"], detail["Dbxref"],
            detail["var_origin"], detail["Start_range"], detail["End_range"], detail["copy_number"], detail["Zygosity"],
            detail["consequence"], detail["remapScore"], detail["validated"],
            detail["sample_name"], detail["phenotype"], detail["phenotype_id"], detail["gender"], detail["variant_seq"],
            detail["Reference_seq"])
        result = cursor.execute(sql, params)
        conn.commit()
        cursor.close()
conn.close()
inputfile.close()

time2 = time.time()
timespend = time2 - time1

print sys.argv[1] + ": " + str(COUNT) + " lines in " + str(timespend) + "s"


